In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import time
Load data:
In [2]:
train_raw = pd.read_csv('train.csv')
test_raw = pd.read_csv('test.csv')
train_raw.info()
test_raw.info()
In [3]:
x_train = np.array(train_raw.drop(['label'], axis=1)) # Each row is a data point
# Normalization:
#x_train_mean = np.mean(x_train, axis=0)
#x_train_std = np.std(x_train, axis=0)
#x_train = (x_train - x_train_mean) / x_train_std
In [4]:
y_train = np.array(train_raw['label']).reshape(-1)
y_train = np.eye(10)[y_train] # Make it one hot
In [5]:
# Configure multithreading:
config = tf.ConfigProto(device_count={"CPU": 4}, # use 4 CPU cores
inter_op_parallelism_threads = 4, # number of inter threads
intra_op_parallelism_threads = 32, # number of inner threads
log_device_placement=True)
Logistic Regression:
In [6]:
# Note: the convention in TensorFlow is Y = X^T * W + b
x = tf.placeholder(tf.float32, shape=[None, 784]) # None means any shape is fine
y_ = tf.placeholder(tf.float32, shape=[None, 10])
W = tf.Variable(tf.zeros([784,10]), dtype=tf.float32)
b = tf.Variable(tf.zeros([10]), dtype=tf.float32)
y = tf.matmul(x, W) + b
In [7]:
# Use cross entropy + l2 as loss function
l2_reg_logi = tf.nn.l2_loss(W)
cross_entropy_logi = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
loss_logi = cross_entropy_logi + 0.01 * l2_reg_logi
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss_logi)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
In [8]:
# Run training step 1000 times using batch updates from training data
run_time = time.time()
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer()) # Initialization!
train_size = x_train.shape[0]
for epoch in range(10):
i = 0
while i < train_size:
if i + 100 < train_size:
train_step.run(feed_dict={x: x_train[i:i+100, :], y_: y_train[i:i+100]})
else:
train_step.run(feed_dict={x: x_train[i:train_size, :], y_: y_train[i:train_size]})
i += 100 # batch size = 100
print('Logistic Regression training accuracy: ', accuracy.eval(feed_dict={x: x_train, y_: y_train}))
run_time = time.time() - run_time
print('Logistic Regression total running time: ', run_time)
Convolution Neural Network:
In [9]:
# We use a CNN with 3 hidden conv layers, each followed by a max pooling layer, then a fully connected layer
# First conv layer (3 * 3 * 32)
x_image = tf.reshape(x, [-1, 28, 28, 1]) # Reshape x to a rank 4 tensor: (# images) * 28H * 28W * 1Channel
# Note: -1 means the # of dimensions in that axis is computed to keep total dim fixed
W_conv1 = tf.Variable(tf.truncated_normal([3, 3, 1, 32], stddev=0.1)) # Avoid very small weights
b_conv1 = tf.Variable(tf.truncated_normal([32], stddev=0.1))
h_conv1 = tf.nn.relu(tf.nn.conv2d(x_image, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1)
# First max pooling layer (2 * 2) wit stride 2
h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # 2 * 2 stride => H, W = H, W / 2
In [10]:
# Second conv layer (3 * 3 * 64)
W_conv2 = tf.Variable(tf.truncated_normal([3, 3, 32, 64], stddev=0.1))
b_conv2 = tf.Variable(tf.truncated_normal([64], stddev=0.1))
h_conv2 = tf.nn.relu(tf.nn.conv2d(h_pool1, W_conv2, strides=[1, 1, 1, 1], padding='SAME') + b_conv2)
# Second max pooling layer (2 * 2) with stride 2
h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
In [11]:
# Third conv layer (5 * 5 * 128)
W_conv3 = tf.Variable(tf.truncated_normal([5, 5, 64, 128], stddev=0.1))
b_conv3 = tf.Variable(tf.truncated_normal([128], stddev=0.1))
h_conv3 = tf.nn.relu(tf.nn.conv2d(h_pool2, W_conv3, strides=[1, 1, 1, 1], padding='SAME') + b_conv3)
# Third max pooling layer (2 * 2) with stride 1
h_pool3 = tf.nn.max_pool(h_conv3, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME')
In [12]:
# Fully connected layer with 1024 neurons
W_fc1 = tf.Variable(tf.truncated_normal([7 * 7 * 128, 1024], stddev=0.1))
b_fc1 = tf.Variable(tf.truncated_normal([1024], stddev=0.1))
h_pool3_flat = tf.reshape(h_pool3, [-1, 7 * 7 * 128])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
In [13]:
# Dropout to readuce overfitting
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
In [14]:
# Final output layer (no activations yet; later specify in loss function)
W_fc2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1))
b_fc2 = tf.Variable(tf.truncated_normal([10], stddev=0.1))
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
In [15]:
# Softmax + cross entropy + L2 + ADAM optimizer
cross_entropy_cnn = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
l2_reg_cnn = tf.nn.l2_loss(W_conv1) + tf.nn.l2_loss(W_conv2) + tf.nn.l2_loss(W_fc1) + tf.nn.l2_loss(W_fc2) # Only decay W's
loss_cnn = cross_entropy_cnn + 0.01 * l2_reg_cnn
In [16]:
# A training step:
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss_cnn) # Returns a tf.Operation
# Define accuracy:
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
In [17]:
saver = tf.train.Saver()
run_time = time.time()
# Run model session:
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer()) # Initialization!
train_size = x_train.shape[0]
for epoch in range(1, 201):
i = 0
while i < train_size:
if i + 100 < train_size:
train_step.run(feed_dict={x: x_train[i:i+100, :], y_: y_train[i:i+100], keep_prob: 0.6})
else:
train_step.run(feed_dict={x: x_train[i:train_size, :], y_: y_train[i:train_size], keep_prob: 0.6})
i += 100 # batch size = 100
if epoch % 10 == 0:
train_accuracy = accuracy.eval(feed_dict={x: x_train, y_: y_train, keep_prob: 1.0})
print('Epoch ', epoch, 'training accuracy ', train_accuracy)
# Save the final model:
saver.save(sess, 'saved_model.ckpt')
run_time = time.time() - run_time
print('CNN total running time: ', run_time)
Time for predictions!
In [18]:
x_test = np.array(test_raw)
with tf.Session(config=config) as sess:
# Load saved model:
saver.restore(sess, 'saved_model.ckpt')
# Run sess using loaded model to make prediction:
y_pred = sess.run(tf.nn.softmax(logits=y_conv), feed_dict={x: x_test, keep_prob: 1.0})
print(y_pred[0:3, :])
In [19]:
y_output = np.argmax(y_pred, axis=1)
print(y_output)
In [20]:
output_label = {'ImageId': range(1, y_output.shape[0]+1)}
predictions = pd.DataFrame(output_label)
predictions['Label'] = pd.Series(y_output)
predictions.to_csv('predictions.csv', index=False)
In [ ]: